GAP analyses of shark and ray range overlaps with Marine Protected Areas
Code
# Convert species dataframe to spatial pointsspecies_points <-SpatialPointsDataFrame(coords = puvsp_marine[, c("lon", "lat")], data = puvsp_marine, proj4string =CRS(projection(mpa_NT)))# Extract MPA values for each species pointmpa_NT_values <- raster::extract(mpa_NT, species_points)# Add MPA values to the species dataframepuvsp_marine$mpa_NT_present <- mpa_NT_values# Function to calculate percentage of range in MPAcalculate_mpa_percentage <-function(species_column, mpa_column) { species_presence <- species_column ==1# Assuming 1 indicates species presence total_range <-sum(species_presence, na.rm =TRUE) range_in_mpa <-sum(species_presence & mpa_column ==1, na.rm =TRUE)if (total_range ==0) {return(NA) # Return NA if the species is not present anywhere } percentage <- (range_in_mpa / total_range) *100return(percentage)}# Apply function to all species columns for both MPA typesspecies_columns <-4:(ncol(puvsp_marine) -2) # Assuming species columns start at 4mpa_NT_percentages <-sapply(puvsp_marine[, species_columns], function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_NT_present))# Create a dataframe with resultsresults <-data.frame(species =names(mpa_NT_percentages),percentage_in_NT_MPA = mpa_NT_percentages)# Remove any NA resultsresults <- results[!is.na(results$percentage_in_NT_MPA), ]# Sort results by percentage in no-take MPAs (descending)results <- results[order(-results$percentage_in_NT_MPA), ]# Display top 10 species#print(head(results, 10))# Summary statisticssummary_stats <-data.frame(Statistic =c("Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max", "SD"),Value =c(summary(results$percentage_in_NT_MPA)[1],summary(results$percentage_in_NT_MPA)[2],summary(results$percentage_in_NT_MPA)[3],summary(results$percentage_in_NT_MPA)[4],summary(results$percentage_in_NT_MPA)[5],summary(results$percentage_in_NT_MPA)[6],sd(results$percentage_in_NT_MPA) ))kable(summary_stats, caption ="Summary Statistics for No-Take MPAs", digits =2)
Summary Statistics for No-Take MPAs
Statistic
Value
Min.
Min
0.00
1st Qu.
1st Qu.
0.00
Median
Median
0.78
Mean
Mean
2.58
3rd Qu.
3rd Qu.
2.83
Max.
Max
50.00
SD
5.40
Code
# Save results to CSVwrite.csv(results, here::here("Data","species_mpa_coverage_NT.csv"), row.names =FALSE)
Map of mean % range covered
Code
# Convert results to a named vector for easier lookupspecies_NT_percentages <-setNames(results$percentage_in_NT_MPA, results$species)# Create a new dataframe to store resultsgrid_data <- puvsp_marine %>% dplyr::select(lon, lat) %>%mutate(species_count =0,sum_percentages =0 )# For each species, add its coverage percentage to cells where it's presentfor (sp_name innames(species_NT_percentages)) {# Find the column index in the original data col_idx <-which(names(puvsp_marine) == sp_name)if (length(col_idx) >0) {# Find cells where this species is present present_cells <-which(puvsp_marine[[col_idx]] ==1)if (length(present_cells) >0) {# Add to species count grid_data$species_count[present_cells] <- grid_data$species_count[present_cells] +1# Add the species' NT percentage to the sum grid_data$sum_percentages[present_cells] <- grid_data$sum_percentages[present_cells] + species_NT_percentages[sp_name] } }}# Calculate mean percentage for each grid cellgrid_data <- grid_data %>%mutate(mean_NT_coverage =ifelse(species_count >0, sum_percentages / species_count, NA)) %>%filter(!is.na(mean_NT_coverage))# Exclude high seas grid cells ---- # Create high seas template rastertemplate_raster <-rast(ext(High_seas), resolution =0.5)# Rasterize high seas polygonhighseas_raster <-rasterize(vect(High_seas), template_raster, field =1, background =0)# Convert grid_data to terra SpatVector for extractiongrid_points <-vect(as.data.frame(grid_data), geom =c("lon", "lat"), crs =crs(highseas_raster))# Extract values from highseas rasterhighseas_values <- terra::extract(highseas_raster, grid_points)# Add highseas classification back to the dataframegrid_data$is_highseas <- highseas_values[, 2]grid_data$is_highseas[is.na(grid_data$is_highseas)] <-0# Set NA to 0 (continental)# Filter to keep only continental waters (is_highseas = 0)grid_data_continental <- grid_data %>%filter(is_highseas ==0)# Define the projectionmcbryde_thomas_2 <-"+proj=mbt_s"# Convert grid data to sf object and project (continental waters only)grid_data_sf <- grid_data_continental %>%st_as_sf(coords =c("lon", "lat"), crs =4326) %>%st_transform(crs = mcbryde_thomas_2)# Get and project landland_projected <-ne_countries(scale ="medium", returnclass ="sf") %>%st_transform(crs = mcbryde_thomas_2)# Create the globe borderglobe_bbox <-rbind(c(-180, -90), c(-180, 90), c(180, 90), c(180, -90), c(-180, -90))globe_border <-st_polygon(list(globe_bbox)) %>%st_sfc(crs =4326) %>%st_sf(data.frame(rgn ='globe', geom = .)) %>% smoothr::densify(max_distance =0.5) %>%st_transform(crs = mcbryde_thomas_2)# Create base thememy_theme <-theme_minimal() +theme(legend.position ="bottom",legend.direction ="horizontal",legend.box ="vertical",legend.margin =margin(t =20, r =0, b =0, l =0),legend.title =element_text(margin =margin(b =10)),panel.background =element_rect(fill ="white", color =NA),plot.background =element_rect(fill ="white", color =NA),panel.grid =element_blank(),axis.text =element_blank(), # Add this lineaxis.ticks =element_blank() # Add this line )# Calculate the data range for better color scalingmax_value <-max(grid_data_continental$mean_NT_coverage, na.rm =TRUE)color_breaks <-seq(0, ceiling(max_value /10) *10, by =2)# Map with built-in transformationnt_map_projected <-ggplot() +geom_sf(data = grid_data_sf, aes(color = mean_NT_coverage), size =0.5, alpha =0.7) +geom_sf(data = land_projected, fill ="lightgrey", color ="lightgrey", size =0.1) +geom_sf(data = globe_border, fill =NA, color ="black", size =0.5) +scale_color_viridis_c(option ="viridis",name ="Mean % range in no-take MPAs",trans ="sqrt", # Use "log10" for log scalebreaks = color_breaks,guide =guide_colorbar(barwidth =20, barheight =0.5, title.position ="top", title.hjust =0.5) ) +coord_sf() +labs(# title = "Mean Percentage of Species Range in No-Take MPAs",# subtitle = "Continental Waters Only (Square Root Scale)",x =NULL, y =NULL ) +# annotate("text", x = -Inf, y = Inf, label = "(A)", # hjust = -1, vjust = 2, size = 6, fontface = "bold") + my_theme# Print the projected mapprint(nt_map_projected)
# Convert grid_data_continental to sf for spatial operationsgrid_sf <- grid_data_continental %>%st_as_sf(coords =c("lon", "lat"), crs =4326)#=====================# Calculate by Realm#=====================# Extract realms from meow_ecosrealms <- meow_ecos %>%group_by(REALM) %>%summarize(geometry =st_union(geometry)) %>%ungroup()# Spatial join with realmsgrid_realms <-st_join(grid_sf, realms)# Calculate mean coverage by realmrealm_stats <- grid_realms %>%st_drop_geometry() %>%group_by(REALM) %>%summarize(mean_NT_coverage =mean(mean_NT_coverage, na.rm =TRUE),median_NT_coverage =median(mean_NT_coverage, na.rm =TRUE),sd_NT_coverage =sd(mean_NT_coverage, na.rm =TRUE),n_cells =n() ) %>%arrange(desc(mean_NT_coverage))#======================# Calculate by Ecoregion#======================# Spatial join with ecoregionsgrid_ecoregions <-st_join(grid_sf, meow_ecos)# Calculate mean coverage by ecoregionecoregion_stats <- grid_ecoregions %>%st_drop_geometry() %>%group_by(ECOREGION) %>%summarize(PROVINCE =first(PROVINCE),REALM =first(REALM),mean_NT_coverage =mean(mean_NT_coverage, na.rm =TRUE),median_NT_coverage =median(mean_NT_coverage, na.rm =TRUE),sd_NT_coverage =sd(mean_NT_coverage, na.rm =TRUE),n_cells =n() ) %>%arrange(desc(mean_NT_coverage))#======================# Calculate by EEZ#======================# Fix potential invalid geometries in EEZ dataeez_valid <-st_make_valid(eez)# Spatial join with EEZgrid_eez <-st_join(grid_sf, eez_valid %>% dplyr::select(TERRITORY1, SOVEREIGN1, ISO_TER1, MRGID_TER1))# Calculate mean coverage by EEZeez_stats <- grid_eez %>%st_drop_geometry() %>%group_by(TERRITORY1, SOVEREIGN1) %>%summarize(ISO_TER1 =first(ISO_TER1),MRGID_TER1 =first(MRGID_TER1),mean_NT_coverage =mean(mean_NT_coverage, na.rm =TRUE),median_NT_coverage =median(mean_NT_coverage, na.rm =TRUE),sd_NT_coverage =sd(mean_NT_coverage, na.rm =TRUE),n_cells =n() ) %>%arrange(desc(mean_NT_coverage))# Print summary tablescat("Top 10 Realms by Mean No-Take MPA Coverage:\n")
Top 10 Realms by Mean No-Take MPA Coverage:
Code
print(head(realm_stats, 10))
# A tibble: 10 × 5
REALM mean_NT_coverage median_NT_coverage sd_NT_coverage n_cells
<chr> <dbl> <dbl> <dbl> <int>
1 Temperate Austral… 1.79 1.79 NA 2109
2 Central Indo-Paci… 1.78 1.78 NA 9292
3 Southern Ocean 1.73 1.73 NA 1504
4 Tropical Atlantic 1.60 1.60 NA 4510
5 Western Indo-Paci… 1.51 1.51 NA 4680
6 Tropical Eastern … 1.49 1.49 NA 1357
7 Eastern Indo-Paci… 1.44 1.44 NA 6303
8 <NA> 1.16 1.16 NA 5817
9 Temperate South A… 1.15 1.15 NA 2432
10 Temperate Souther… 1.15 1.15 NA 694
Code
cat("\nTop 10 Ecoregions by Mean No-Take MPA Coverage:\n")
Top 10 Ecoregions by Mean No-Take MPA Coverage:
Code
print(head(ecoregion_stats, 10))
# A tibble: 10 × 7
ECOREGION PROVINCE REALM mean_NT_coverage median_NT_coverage sd_NT_coverage
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Heard and … Subanta… Sout… 5.96 5.96 NA
2 Central an… Northea… Cent… 4.97 4.97 NA
3 Ross Sea Contine… Sout… 4.90 4.90 NA
4 Coral Sea Tropica… Cent… 4.52 4.52 NA
5 Kerguelen … Subanta… Sout… 3.78 3.78 NA
6 Cortezian Warm Te… Temp… 3.39 3.39 NA
7 Torres Str… Northea… Cent… 3.17 3.17 NA
8 Arnhem Coa… Sahul S… Cent… 3.16 3.16 NA
9 Tweed-More… East Ce… Temp… 3.10 3.10 NA
10 Gulf of Pa… Sahul S… Cent… 2.75 2.75 NA
# ℹ 1 more variable: n_cells <int>
Code
cat("\nTop 10 EEZs by Mean No-Take MPA Coverage:\n")
Top 10 EEZs by Mean No-Take MPA Coverage:
Code
print(head(eez_stats, 10))
# A tibble: 10 × 8
# Groups: TERRITORY1 [10]
TERRITORY1 SOVEREIGN1 ISO_TER1 MRGID_TER1 mean_NT_coverage median_NT_coverage
<chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Heard and… Australia HMD 8631 4.77 4.77
2 Kerguélen France ATF 8630 3.32 3.32
3 Sint-Maar… Netherlan… SXM 21802 2.83 2.83
4 Quitasueñ… Colombia <NA> 5765 2.83 2.83
5 Saint-Bar… France BLM 19099 2.80 2.80
6 Saba Netherlan… BES 19095 2.74 2.74
7 Collectiv… France MAF 8688 2.71 2.71
8 Belize Belize BLZ 8681 2.60 2.60
9 Australia Australia AUS 2147 2.55 2.55
10 Saint Kit… Saint Kit… KNA 8644 2.51 2.51
# ℹ 2 more variables: sd_NT_coverage <dbl>, n_cells <int>
Code
#======================# Create visualizations#======================# 1. Choropleth map of ecoregions# Join stats back to spatial datameow_ecos_stats <- meow_ecos %>%left_join(ecoregion_stats, by ="ECOREGION")# Plot ecoregions mapeco_map <-ggplot() +geom_sf(data = meow_ecos_stats, aes(fill = mean_NT_coverage), color =NA) +geom_sf(data =ne_countries(scale ="medium", returnclass ="sf"), fill ="lightgrey", color ="darkgrey", size =0.1) +scale_fill_viridis_c(name ="Mean % range in no-take MPAs",na.value ="grey90",trans ="sqrt",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +theme_minimal() +labs(#title = "Mean No-Take MPA Coverage by Marine Ecoregion",x =NULL, y =NULL ) +theme(legend.position ="bottom",panel.grid =element_blank(),axis.text =element_blank(),axis.ticks =element_blank() )# Save ecoregion mapggsave(here("Outputs", "ecoregion_NT_coverage_map.png"), eco_map, width =12, height =8, dpi =300)# 2. Choropleth map of EEZs# Join stats back to spatial dataeez_stats_map <- eez_valid %>%left_join(eez_stats, by =c("TERRITORY1", "SOVEREIGN1"))# Plot EEZ mapeez_map <-ggplot() +geom_sf(data = eez_stats_map, aes(fill = mean_NT_coverage), color =NA) +geom_sf(data =ne_countries(scale ="medium", returnclass ="sf"), fill ="lightgrey", color ="darkgrey", size =0.1) +scale_fill_viridis_c(name ="Mean % range in No-Take MPAs",na.value ="grey90",trans ="sqrt",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +theme_minimal() +labs(title ="Mean No-Take MPA Coverage by Exclusive Economic Zone (EEZ)",x =NULL, y =NULL ) +theme(legend.position ="bottom",panel.grid =element_blank() )# Save EEZ mapggsave(here("Outputs", "eez_NT_coverage_map.png"), eez_map, width =12, height =8, dpi =300)# 3. Bar plots for top regions# Top 20 Ecoregionstop_eco_plot <-ggplot(head(ecoregion_stats, 20), aes(x =reorder(ECOREGION, mean_NT_coverage), y = mean_NT_coverage)) +geom_bar(stat ="identity", fill ="steelblue") +geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, ymax = mean_NT_coverage + sd_NT_coverage), width =0.2) +coord_flip() +labs(# title = "Top 20 Marine Ecoregions by Mean No-Take MPA Coverage",x =NULL,y ="Mean % range in no-take MPAs" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Save top ecoregions plotggsave(here("Outputs", "top_ecoregions_NT_coverage.png"), top_eco_plot, width =10, height =8, dpi =300)# Bottom 20 Ecoregionsbottom_eco_plot <-ggplot(tail(ecoregion_stats, 20), aes(x =reorder(ECOREGION, mean_NT_coverage), y = mean_NT_coverage)) +geom_bar(stat ="identity", fill ="coral") +geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, ymax = mean_NT_coverage + sd_NT_coverage), width =0.2) +coord_flip() +labs(# title = "Bottom 20 Marine Ecoregions by Mean No-Take MPA Coverage",x =NULL,y ="Mean % range in no-take MPAs" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Save bottom ecoregions plotggsave(here("Outputs", "bottom_ecoregions_NT_coverage.png"), bottom_eco_plot, width =10, height =8, dpi =300)# Top 20 EEZstop_eez_plot <-ggplot(head(eez_stats, 20), aes(x =reorder(TERRITORY1, mean_NT_coverage), y = mean_NT_coverage)) +geom_bar(stat ="identity", fill ="steelblue") +geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, ymax = mean_NT_coverage + sd_NT_coverage), width =0.2) +coord_flip() +labs(title ="Top 20 EEZs by Mean No-Take MPA Coverage",x =NULL,y ="Mean % Range in No-Take MPAs" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Save top EEZs plotggsave(here("Outputs", "top_eezs_NT_coverage.png"), top_eez_plot, width =10, height =8, dpi =300)
Null model of MPA placement within EEZs
Null model of MPA placement and overlaps with the range of sharks and rays
Code
# Create marine mask from bathymetry (areas < 0)marine_mask <- Bathy <0# Resample marine mask to match MPA raster resolutionmarine_mask_resampled <- raster::resample(marine_mask, mpa_NT, method ="ngb")# Ensure marine_mask_resampled is binary (0 or 1)marine_mask_resampled <- raster::reclassify(marine_mask_resampled, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Function to create a raster mask of EEZs with unique values for each countrycreate_eez_mask <-function(template_raster) {# Ensure EEZ has a unique identifier for each country eez$country_id <-as.numeric(as.factor(eez$SOVEREIGN1))# Rasterize the EEZ shapefile using the country_id eez_raster <-rasterize(eez, template_raster, field ="country_id")return(eez_raster)}# Updated create_random_mpa function to randomize within each country's EEZcreate_random_mpa <-function(template_raster, marine_mask, eez_mask) {# Ensure template_raster is binary template_raster <- raster::reclassify(template_raster, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Combine marine_mask and eez_mask combined_mask <- marine_mask * (eez_mask >0)# Get unique country IDs country_ids <-unique(eez_mask[!is.na(eez_mask)]) country_ids <- country_ids[country_ids >0]# Initialize random raster random_raster <- template_raster random_raster[] <-NAfor (country_id in country_ids) {# Create mask for current country country_mask <- eez_mask == country_id# Count original marine MPA cells within current country's EEZ original_mpa_cells <-sum(template_raster[] ==1& country_mask[] &!is.na(combined_mask[]), na.rm =TRUE)# Get all valid cells for current country (marine areas within EEZ) valid_cells <-which(country_mask[] &!is.na(combined_mask[])) total_valid_cells <-length(valid_cells)if (original_mpa_cells >0&& total_valid_cells >0) {if (original_mpa_cells > total_valid_cells) {warning(paste("More MPA cells than valid marine cells for country ID", country_id, ". Adjusting MPA cell count.")) original_mpa_cells <- total_valid_cells }# Randomly select valid cells to be MPAs for current country mpa_cells <-sample(valid_cells, size =min(original_mpa_cells, total_valid_cells), replace =FALSE)# Update random raster random_raster[valid_cells] <-0 random_raster[mpa_cells] <-1 } }return(random_raster)}# Calculate MPA percentage functioncalculate_mpa_percentage <-function(species_values, mpa_values) { total_cells <-sum(!is.na(species_values)) mpa_cells <-sum(mpa_values ==1&!is.na(species_values), na.rm =TRUE) percentage <- (mpa_cells / total_cells) *100return(percentage)}# Main analysis functionrun_random_mpa_analysis <-function(species_data, mpa_raster, marine_mask, eez_mask, n_iterations =100) {# Convert species dataframe to spatial points species_points <- sp::SpatialPointsDataFrame(coords = species_data[, c("lon", "lat")], data = species_data, proj4string = sp::CRS(raster::projection(mpa_raster)))# Prepare results storage species_columns <-4:(ncol(species_data) -1) # Adjust if needed all_results <-matrix(nrow =length(species_columns), ncol = n_iterations)rownames(all_results) <-names(species_data)[species_columns]# Use pblapply for parallel processing with a progress bar all_results <-pblapply(1:n_iterations, function(i) {set.seed(i) # Set seed for reproducibility random_mpa <-create_random_mpa(mpa_raster, marine_mask, eez_mask) random_mpa_values <- raster::extract(random_mpa, species_points)sapply(species_data[, species_columns], function(x) calculate_mpa_percentage(x, random_mpa_values)) }, cl =detectCores() -1) # Use one less than available cores all_results <-do.call(cbind, all_results)# Calculate mean and standard deviation for each species mean_percentages <-rowMeans(all_results, na.rm =TRUE) sd_percentages <-apply(all_results, 1, sd, na.rm =TRUE) results <-data.frame(species =rownames(all_results),mean_percentage = mean_percentages,sd_percentage = sd_percentages )return(results)}# Function to process resultsprocess_results <-function(results, mpa_type) {# Sort results by mean_percentage in descending order results <- results[order(-results$mean_percentage), ]# Calculate summary statistics summary_stats <-summary(results$mean_percentage)# Write results to CSVwrite.csv(results, here::here("Outputs",paste0("species_random_", mpa_type, "_coverage.csv")), row.names =FALSE)# Return a list containing the processed datareturn(list(top_10 =head(results, 10),summary_stats = summary_stats,all_results = results ))}# Create the EEZ mask with unique country IDseez_mask <-create_eez_mask(mpa_NT)# Run the analysis with the country-specific EEZ constrainttryCatch({ random_results_NT <-run_random_mpa_analysis(puvsp_marine, mpa_NT, marine_mask_resampled, eez_mask, n_iterations =100)print("Analysis for No-Take MPAs completed successfully.")}, error =function(e) {print(paste("Error in No-Take MPAs analysis:", e$message))})
[1] "Analysis for No-Take MPAs completed successfully."
Code
# Process results for both MPA typesprocessed_results <-list()if (exists("random_results_NT")) { processed_results[["No-take MPAs"]] <-process_results(random_results_NT, "No-take MPAs")}
Compare results
Compare results between the MPA network and the Null model of MPA placement
Code
# Load actual MPA coverage dataactual_coverage <-read.csv(here::here("Data", "species_mpa_coverage_NT.csv"))colnames(actual_coverage)[2] <-c("NT")# Load random MPA coverage resultsrandom_NT <-read.csv(here::here("Outputs", "species_random_No-take MPAs_coverage.csv"))# Reshape actual coverage data to long format (NT only)actual_long <- actual_coverage %>%select(species, NT) %>%rename(actual_percentage = NT)# Function to merge and compare actual vs random datacompare_coverage <-function(actual_data, random_data) { comparison <- actual_data %>%left_join(random_data, by ="species") %>%mutate(difference = actual_percentage - mean_percentage,z_score = (actual_percentage - mean_percentage) / sd_percentage)return(comparison)}# Create comparison dataframe for NT MPAscomparison_NT <-compare_coverage(actual_long, random_NT)# Function to summarize and print resultssummarize_results <-function(comparison_data) { over_represented <-mean(comparison_data$difference >0, na.rm =TRUE) *100 under_represented <-mean(comparison_data$difference <0, na.rm =TRUE) *100 equally_represented <-100- over_represented - under_represented summary_df <-data.frame(Representation =c("Over-represented", "Under-represented", "Equally represented"),Percentage =round(c(over_represented, under_represented, equally_represented), 2) )kable(summary_df, format ="html", col.names =c("Representation", "Percentage (%)"),caption ="Summary for No-Take MPAs") %>%kable_styling(bootstrap_options =c("striped", "hover", "condensed", "responsive"),full_width =FALSE) %>%column_spec(2, width ="100px") %>%row_spec(0, bold =TRUE) %>%add_header_above(c(" "=1, "Species"=1)) %>%footnote(general ="Percentages may not sum to 100% due to rounding.")}# Summarize resultssummarize_results(comparison_NT)
Top 10 significantly over/under-represented species in No-Take MPAs
Species
Actual %
Random %
Difference
SES
Over-represented
Pseudoginglymostoma brevicaudatum
0.98
0.00
0.98
Inf
Narke capensis
0.78
0.00
0.78
Inf
Trigonognathus kabeyai
44.27
2.60
41.67
32.80
Apristurus spongiceps
28.74
2.44
26.30
26.41
Parmaturus albimarginatus
5.00
0.03
4.97
19.90
Parmaturus albipenis
5.56
0.06
5.50
14.07
Etmopterus villosus
34.21
2.29
31.92
13.91
Mobula tarapacana
1.15
0.97
0.18
10.35
Hemiscyllium galei
36.36
1.36
35.00
9.97
Carcharhinus galapagensis
6.30
2.97
3.33
9.56
Under-represented
Carcharhinus obscurus
3.13
6.05
-2.92
-13.82
Hypogaleus hyugaensis
5.69
16.07
-10.38
-13.69
Sphyrna lewini
1.58
2.99
-1.41
-13.66
Carcharias taurus
2.04
6.09
-4.05
-13.49
Sphyrna mokarran
1.41
2.58
-1.17
-12.15
Carcharhinus plumbeus
1.82
3.32
-1.50
-11.26
Sphyrna zygaena
0.99
2.68
-1.69
-11.18
Etmopterus lucifer
1.92
5.25
-3.33
-11.18
Amblyraja hyperborea
1.01
2.37
-1.35
-10.75
Heterodontus portusjacksoni
4.04
21.28
-17.25
-10.72
Note:
Total significantly over-represented species: 133
Total significantly under-represented species: 365
Code
# Stacked bar chart# Create a summary dataframe for plottingsummary_NT <-data.frame(Representation =c("Under-represented", "Equally represented", "Over-represented"),Percentage =c(mean(comparison_NT$difference <0, na.rm =TRUE) *100,100-mean(comparison_NT$difference >0, na.rm =TRUE) *100-mean(comparison_NT$difference <0, na.rm =TRUE) *100,mean(comparison_NT$difference >0, na.rm =TRUE) *100 ))# Order the factor levels in reverse for proper stackingsummary_NT$Representation <-factor(summary_NT$Representation,levels =c("Over-represented", "Equally represented", "Under-represented"))# Create the horizontal stacked barplotstacked_bar_chart <-ggplot(summary_NT, aes(x =1, y = Percentage, fill = Representation)) +geom_bar(stat ="identity", position ="stack", width =0.5) +scale_fill_manual(values =c("Over-represented"="#56B4E9","Equally represented"="#999999","Under-represented"="#E69F00"),breaks =c("Under-represented", "Equally represented", "Over-represented")) +geom_text(aes(label =sprintf("%.2f%%", Percentage)),position =position_stack(vjust =0.5),color ="black", size =4) +coord_flip() +theme_minimal() +labs(x =NULL,y ="Percentage of species (%)" ) +scale_y_continuous(expand =c(0, 0), limits =c(0, 100)) +theme(legend.position ="top",legend.title =element_blank(),axis.text.y =element_blank(),axis.ticks.y =element_blank(),panel.grid.major.y =element_blank(),panel.grid.minor.y =element_blank() )# Display the plotprint(stacked_bar_chart)
Breakdown of under-represented species
Code
# Identify under-represented species (where actual coverage is less than random)under_represented_NT <- comparison_NT %>%filter(difference <0) %>%arrange(difference) %>% dplyr::select(species, actual_percentage, mean_percentage, difference, z_score)# Count the number of under-represented speciesn_under_NT <-nrow(under_represented_NT)# Save the list of under-represented species to CSV filewrite.csv(under_represented_NT, here::here("Outputs", "under_represented_species_NT_MPAs.csv"), row.names =FALSE)# Create summary data frame with additional informationunder_rep_NT_summary <- under_represented_NT %>%mutate(protection_gap = mean_percentage - actual_percentage,representation_ratio = actual_percentage / mean_percentage) %>%arrange(desc(protection_gap))# Save the enhanced summary datawrite.csv(under_rep_NT_summary, here::here("Outputs", "under_represented_species_NT_MPAs_summary.csv"), row.names =FALSE)## Compare with IUCN data # Define the function to prepare IUCN dataprepare_iucn_data <-function(species_data, species_col) {# Rename the species column in the input data to match species_data_renamed <- species_data %>% dplyr::rename(species =all_of(species_col))# Ensure we're only joining based on the species name species_data_slim <- species_data_renamed %>% dplyr::select(species) results_IUCN <-inner_join(IUCN, species_data_slim, by ="species") %>%mutate(IUCN_status =case_when( iucn_GE ==0~"LC", iucn_GE ==1~"NT", iucn_GE ==2~"VU", iucn_GE ==3~"EN", iucn_GE ==4~"CR",TRUE~"Unknown" )) %>%filter(IUCN_status !="Unknown")return(results_IUCN)}# Get species lists for each categoryall_species <- actual_coverage %>% dplyr::select(species)under_rep_NT_species <- under_represented_NT %>% dplyr::select(species)# Use the prepare_iucn_data function to get IUCN statuses for each group separatelyall_species_iucn <-prepare_iucn_data(all_species, "species")under_rep_NT_iucn <-prepare_iucn_data(under_rep_NT_species, "species")# Function to summarize IUCN status distributionsummarize_iucn <-function(data, title) {# Count species by IUCN status iucn_counts <- data %>%group_by(IUCN_status) %>%summarise(count =n(),.groups ="drop" ) %>%mutate(percentage = count /sum(count) *100,title = title )# Order IUCN statuses correctly iucn_counts$IUCN_status <-factor(iucn_counts$IUCN_status,levels =c("LC", "NT", "VU", "EN", "CR"))return(iucn_counts)}# Get IUCN summaries for each datasetall_iucn_summary <-summarize_iucn(all_species_iucn, "All Species")under_NT_iucn_summary <-summarize_iucn(under_rep_NT_iucn, "Under-represented (No-take MPAs)")# Print summaries to checkprint(all_iucn_summary)
# A tibble: 5 × 4
IUCN_status count percentage title
<fct> <int> <dbl> <chr>
1 CR 79 8.14 All Species
2 EN 114 11.7 All Species
3 LC 496 51.1 All Species
4 NT 109 11.2 All Species
5 VU 173 17.8 All Species
Code
print(under_NT_iucn_summary)
# A tibble: 5 × 4
IUCN_status count percentage title
<fct> <int> <dbl> <chr>
1 CR 41 6.58 Under-represented (No-take MPAs)
2 EN 71 11.4 Under-represented (No-take MPAs)
3 LC 333 53.5 Under-represented (No-take MPAs)
4 NT 68 10.9 Under-represented (No-take MPAs)
5 VU 110 17.7 Under-represented (No-take MPAs)
Code
# Combine summariescombined_iucn_summary <-bind_rows( all_iucn_summary, under_NT_iucn_summary)# Create a bar plot comparing IUCN distributionsiucn_plot_NT <-ggplot(combined_iucn_summary, aes(x = IUCN_status, y = percentage, fill = title)) +geom_bar(stat ="identity", position ="dodge") +geom_text(aes(label =paste0(round(percentage, 2), "%")),position =position_dodge(width =0.9),vjust =-0.5,size =3) +scale_fill_manual(values =c("All Species"="grey70","Under-represented (No-take MPAs)"="darkred")) +labs(x ="IUCN Red List threat status",y ="Percentage of species (%)",fill ="" ) +theme_classic() +theme(legend.position ="bottom",panel.background =element_rect(fill ="white", color =NA),plot.background =element_rect(fill ="white", color =NA),plot.title =element_text(hjust =0.5, face ="bold"),plot.subtitle =element_text(hjust =0.5) )# Print the plotprint(iucn_plot_NT)
Code
# Save the plotggsave(here::here("Outputs", "under_represented_species_iucn_comparison_NT.png"), iucn_plot_NT, width =10, height =6, dpi =300)# Statistical test: Chi-square test to determine if there are significant differences# in IUCN status distribution between all species and under-represented species# Prepare contingency tablesprepare_contingency <-function(summary_df1, summary_df2) {# Ensure all IUCN categories are present in both datasets all_categories <-c("LC", "NT", "VU", "EN", "CR")# Fill in missing categories with zerofor (cat in all_categories) {if (!cat %in% summary_df1$IUCN_status) { summary_df1 <-bind_rows(summary_df1,data.frame(IUCN_status = cat, count =0,percentage =0, title =unique(summary_df1$title))) }if (!cat %in% summary_df2$IUCN_status) { summary_df2 <-bind_rows(summary_df2,data.frame(IUCN_status = cat, count =0,percentage =0, title =unique(summary_df2$title))) } }# Order and extract counts df1_ordered <- summary_df1 %>%arrange(factor(IUCN_status, levels = all_categories)) %>%pull(count) df2_ordered <- summary_df2 %>%arrange(factor(IUCN_status, levels = all_categories)) %>%pull(count)# Create contingency tablereturn(rbind(df1_ordered, df2_ordered))}# Perform chi-square testcontingency_NT <-prepare_contingency(all_iucn_summary, under_NT_iucn_summary)rownames(contingency_NT) <-c("All Species", "Under-represented (No-take MPAs)")colnames(contingency_NT) <-c("LC", "NT", "VU", "EN", "CR")# Print contingency tablecat("Contingency Table - No-take MPAs:\n")
Contingency Table - No-take MPAs:
Code
print(contingency_NT)
LC NT VU EN CR
All Species 496 109 173 114 79
Under-represented (No-take MPAs) 333 68 110 71 41
Code
cat("\n")
Code
# Run chi-square testchi_test_NT <-chisq.test(contingency_NT)# Print resultscat("Chi-square Test - No-take MPAs vs. All Species:\n")
# Create a summary table with the IUCN distributioniucn_summary_table <- combined_iucn_summary %>%pivot_wider(id_cols = IUCN_status,names_from = title,values_from =c(count, percentage) ) %>%arrange(factor(IUCN_status, levels =c("LC", "NT", "VU", "EN", "CR")))# Save the summary tableswrite.csv(iucn_summary_table, here::here("Outputs", "under_represented_species_iucn_summary_NT.csv"),row.names =FALSE)
Map results of the GAP analyses
Map mean Standardised Effect Sizes of MPA coverage
Code
# Get land polygons from rnaturalearthland <-ne_countries(scale ="medium", returnclass ="sf")# Create template rastertemplate_raster <-rast(ext(High_seas), resolution =0.5)# Rasterize high seas polygonhighseas_raster <-rasterize(vect(High_seas), template_raster, field =1, background =0)# Plot to verify#plot(highseas_raster,# main = "High Seas (1) vs Continental Waters (0)",# col = c("lightblue", "darkblue"))# For No-take MPAsdifference_sp <- comparison_NT[, c(1, 3, 6)]# Calculate cleaned SES for each species in comparison_NTcomparison_NT <- comparison_NT %>%mutate(SES_clean =case_when( sd_percentage ==0& actual_percentage == mean_percentage ~0, sd_percentage ==0& actual_percentage > mean_percentage ~3, # Cap at +3 sd_percentage ==0& actual_percentage < mean_percentage ~-3, # Cap at -3TRUE~ (actual_percentage - mean_percentage) / sd_percentage ))# Reshape puvsp_marine from wide to long formatpuvsp_long <- puvsp_marine %>%pivot_longer(cols =-c(id, lon, lat), names_to ="species", values_to ="presence") %>%filter(!is.na(presence) & presence ==1) # Only keep rows where species is present# Join SES data to puvsp_longcombined_data_clean <- puvsp_long %>%left_join(comparison_NT %>%select(species, SES_clean), by =c("species"="species"))# Calculate mean SES per cell with cleaned valuesmean_ses_per_cell_clean <- combined_data_clean %>%group_by(id, lon, lat) %>%summarise(mean_SES =mean(SES_clean, na.rm =TRUE), .groups ="drop")# Check the cleaned results#summary(mean_ses_per_cell_clean$mean_SES)# Convert to terra SpatVector for extractionses_points_NT <-vect(as.data.frame(mean_ses_per_cell_clean),geom =c("lon", "lat"),crs =crs(highseas_raster))# Extract values from highseas rasterhighseas_values_NT <- terra::extract(highseas_raster, ses_points_NT)# Add highseas classification back to the dataframemean_ses_per_cell_clean$is_highseas <- highseas_values_NT[, 2]mean_ses_per_cell_clean$is_highseas[is.na(mean_ses_per_cell_clean$is_highseas)] <-0# Set NA to 0 (continental)# Convert to sf for mappingmean_ses_sf_NT <-st_as_sf(mean_ses_per_cell_clean, coords =c("lon", "lat"), crs =4326)# Filter for continental waters onlycontinental_points_NT <- mean_ses_sf_NT[mean_ses_sf_NT$is_highseas ==0, ]# Define the projectionmcbryde_thomas_2 <-"+proj=mbt_s"# Project datasetscontinental_projected_NT <-st_transform(continental_points_NT, crs = mcbryde_thomas_2)land_projected <-ne_countries(scale ="medium", returnclass ="sf") %>%st_transform(crs = mcbryde_thomas_2)# Create the globe borderglobe_bbox <-rbind(c(-180, -90), c(-180, 90),c(180, 90), c(180, -90), c(-180, -90))globe_border <-st_polygon(list(globe_bbox)) %>%st_sfc(crs =4326) %>%st_sf(data.frame(rgn ='globe', geom = .)) %>% smoothr::densify(max_distance =0.5) %>%st_transform(crs = mcbryde_thomas_2)# Create base thememy_theme <-theme_minimal() +theme(legend.position ="bottom",legend.direction ="horizontal",legend.box ="vertical",legend.margin =margin(t =20, r =0, b =0, l =0),legend.title =element_text(margin =margin(b =10)),panel.background =element_rect(fill ="white", color =NA),plot.background =element_rect(fill ="white", color =NA),panel.grid =element_blank(),axis.text =element_blank(),axis.ticks =element_blank() )# Create plot for continental waters - No-take MPAs with McBryde-Thomas projectioncontinental_plot_NT <-ggplot() +geom_sf(data = continental_projected_NT, aes(color = mean_SES), size =0.5, alpha =0.7) +geom_sf(data = land_projected, fill ="lightgrey", color ="lightgrey", size =0.1) +geom_sf(data = globe_border, fill =NA, color ="black", size =0.5) +scale_color_gradient2(low ="#b2182b", mid ="#f0f0f0", high ="#2166ac", midpoint =0,# Note: The color scale is manually limited to [-3, 3] to improve visual contrast.# Values outside this range are squished to the nearest limit (using oob = scales::squish),# which prevents extreme SES values (e.g., -10 or +5) from dominating the color scale.limits =c(-3, 3),oob = scales::squish,name ="Mean SES",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +coord_sf() +labs(x =NULL, y =NULL ) + my_theme# Print the plotprint(continental_plot_NT)
Code
# Save the plotggsave(here::here("Outputs", "SES_map_continental_NT.png"), continental_plot_NT,width =10,height =6,dpi =300,bg ="white")#======================# Calculate SES by Ecoregion#======================# Convert mean_ses_per_cell to sf for spatial operationsses_grid_sf <- mean_ses_per_cell_clean %>%st_as_sf(coords =c("lon", "lat"), crs =4326)# Spatial join with ecoregionsses_grid_ecoregions <-st_join(ses_grid_sf, meow_ecos)# Calculate mean SES by ecoregion (excluding areas outside ecoregions)ecoregion_ses_stats <- ses_grid_ecoregions %>%st_drop_geometry() %>%filter(!is.na(ECOREGION)) %>%# Exclude areas outside ecoregionsgroup_by(ECOREGION) %>%summarize(PROVINCE =first(PROVINCE),REALM =first(REALM),mean_SES =mean(mean_SES, na.rm =TRUE),median_SES =median(mean_SES, na.rm =TRUE),sd_SES =sd(mean_SES, na.rm =TRUE),n_cells =n(),.groups ="drop" ) %>%arrange(desc(mean_SES))# Print summarycat("Top 10 Ecoregions by Mean SES:\n")
Top 10 Ecoregions by Mean SES:
Code
print(head(ecoregion_ses_stats, 10))
# A tibble: 10 × 7
ECOREGION PROVINCE REALM mean_SES median_SES sd_SES n_cells
<chr> <chr> <chr> <dbl> <dbl> <dbl> <int>
1 Cocos-Keeling/Christmas Is… Java Tr… Cent… 2.91 2.91 NA 272
2 East Antarctic Dronning Ma… Contine… Sout… 2.77 2.77 NA 13
3 Ross Sea Contine… Sout… 2.77 2.77 NA 36
4 Chagos Central… West… 2.47 2.47 NA 207
5 Phoenix/Tokelau/Northern C… Central… East… 2.30 2.30 NA 812
6 East Antarctic Wilkes Land Contine… Sout… 2.28 2.28 NA 2
7 Hawaii Hawaii East… 2.23 2.23 NA 924
8 Line Islands Central… East… 2.21 2.21 NA 487
9 Ogasawara Islands Tropica… Cent… 2.15 2.15 NA 404
10 Cocos Islands Tropica… Trop… 2.11 2.11 NA 109
Code
cat("\nBottom 10 Ecoregions by Mean SES:\n")
Bottom 10 Ecoregions by Mean SES:
Code
print(tail(ecoregion_ses_stats, 10))
# A tibble: 10 × 7
ECOREGION PROVINCE REALM mean_SES median_SES sd_SES n_cells
<chr> <chr> <chr> <dbl> <dbl> <dbl> <int>
1 North and East Barents Sea Arctic Arct… -7.09 -7.09 NA 1828
2 Bouvet Island Subanta… Sout… -7.98 -7.98 NA 1
3 Antarctic Peninsula Scotia … Sout… -8.20 -8.20 NA 258
4 Baltic Sea Norther… Temp… -8.68 -8.68 NA 228
5 Chukchi Sea Arctic Arct… -8.74 -8.74 NA 390
6 Beaufort Sea - continental… Arctic Arct… -8.78 -8.78 NA 2
7 Weddell Sea Contine… Sout… -8.87 -8.87 NA 174
8 White Sea Arctic Arct… -9.91 -9.91 NA 62
9 Amundsen/Bellingshausen Sea Contine… Sout… -10.7 -10.7 NA 298
10 Peter the First Island Subanta… Sout… -10.7 -10.7 NA 5
Code
#======================# Create SES ecoregion map#======================# Join SES stats back to spatial datameow_ecos_ses_stats <- meow_ecos %>%left_join(ecoregion_ses_stats, by ="ECOREGION")# Plot ecoregions SES mapses_eco_map <-ggplot() +geom_sf(data = meow_ecos_ses_stats, aes(fill = mean_SES), color =NA) +geom_sf(data = land, fill ="darkgrey", color ="darkgrey", size =0.1) +scale_fill_gradient2(name ="Mean SES",low ="#b2182b", mid ="#f0f0f0", high ="#2166ac", midpoint =0,na.value ="grey90",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +coord_sf(xlim =c(-180, 180), ylim =c(-90, 90), expand =FALSE) +theme_minimal() +labs(x =NULL, y =NULL ) +theme(legend.position ="bottom",panel.grid =element_blank(),axis.text =element_blank(),axis.ticks =element_blank() )# Print and save the mapprint(ses_eco_map)
Code
ggsave(here("Outputs", "ecoregion_SES_map_NT_clean.png"), ses_eco_map, width =12, height =8, dpi =300)# Top 20 Ecoregions by Mean SEStop_ses_plot <-ggplot(head(ecoregion_ses_stats, 20),aes(x =reorder(ECOREGION, mean_SES), y = mean_SES)) +geom_bar(stat ="identity", fill ="steelblue") +geom_errorbar(aes(ymin = mean_SES - sd_SES,ymax = mean_SES + sd_SES),width =0.2) +coord_flip() +labs(x =NULL,y ="Mean SES" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Bottom 20 Ecoregions by Mean SESbottom_ses_plot <-ggplot(tail(ecoregion_ses_stats, 20),aes(x =reorder(ECOREGION, mean_SES), y = mean_SES)) +geom_bar(stat ="identity", fill ="coral") +geom_errorbar(aes(ymin = mean_SES - sd_SES,ymax = mean_SES + sd_SES),width =0.2) +coord_flip() +labs(x =NULL,y ="Mean SES" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Print and save plotsprint(top_ses_plot)
Category IUCN Predicted_Coverage
1 0 LC 3.01
2 1 NT 2.91
3 2 VU 2.81
4 3 EN 2.72
5 4 CR 2.63
Code
# Average decrease per categoryavg_decrease_nt <-mean(diff(mpa_pred_nt))print(paste("Average decrease per threat category:", round(avg_decrease_nt, 2), "%"))
[1] "Average decrease per threat category: -0.1 %"
Code
#======================# Ridge Plot with Points - No-take MPAs#======================# Define IUCN colors and orderiucn_colors <-c("CR"="#D81E05","EN"="#FC7F3F","VU"="#FEC748","NT"="#58AFFF","LC"="#38AB38")iucn_order <-c("LC", "NT", "VU", "EN", "CR")# Prepare no-take MPA data for plottingnotake_mpa_iucn <- results_IUCN_filtered %>% dplyr::select(species, IUCN_status, NT) %>%rename(protection = NT)# Create the ridge plot with colored pointsrdplot_points <-ggplot(notake_mpa_iucn, aes(x =factor(IUCN_status, levels = iucn_order), y = protection)) +geom_jitter(aes(color = IUCN_status),width =0.1, size =1.5, alpha =0.6) +# Mean and SD statisticsstat_summary(fun = mean, geom ="point", size =3, color ="black") +stat_summary(fun.data =function(x) { mean_val <-mean(x, na.rm =TRUE) sd_val <-sd(x, na.rm =TRUE)return(data.frame(y = mean_val, ymin =max(0, mean_val - sd_val),ymax = mean_val + sd_val)) },geom ="errorbar", width =0.1, color ="black") +labs(x ="IUCN Red List threat status", y ="(%) Range within no-take MPAs") +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold"),legend.position ="none",panel.grid.major.x =element_blank() ) +scale_x_discrete(limits = iucn_order) +scale_color_manual(values = iucn_colors) +scale_y_sqrt(limits =c(0, 50),breaks =seq(0, 50, 10),labels =c("0", "10", "20", "30", "40", ""))print(rdplot_points)
Code
# Add white background to the plotrdplot_points_white <- rdplot_points +theme(plot.background =element_rect(fill ="white", color =NA),panel.background =element_rect(fill ="white", color =NA) )# Save PNG with white backgroundggsave(filename = here::here("Outputs", "notake_mpa_protection_points.png"),plot = rdplot_points_white,width =6,height =5,dpi =300,bg ="white")# Summary statistics for no-take MPAssummary_stats_nt <- results_IUCN_filtered %>%group_by(IUCN_status) %>%summarise(count =n(),mean =mean(NT, na.rm =TRUE),median =median(NT, na.rm =TRUE),sd =sd(NT, na.rm =TRUE),.groups ="drop" ) %>%arrange(factor(IUCN_status, levels = iucn_order))print("Summary statistics for No-take MPAs:")
[1] "Summary statistics for No-take MPAs:"
Code
print(summary_stats_nt)
# A tibble: 5 × 5
IUCN_status count mean median sd
<chr> <int> <dbl> <dbl> <dbl>
1 LC 514 3.63 0.961 7.07
2 NT 118 2.03 0.878 3.21
3 VU 179 2.07 0.872 4.05
4 EN 121 1.58 0.834 2.59
5 CR 83 1.41 0.826 1.88
# Define the output path using `here`output_path <- here::here("Outputs", "Fig_1_combined_points_3.png")# Save the final plot using `ggsave`ggsave(filename = output_path, # File path for savingplot = final_plot_combined, # The combined plotwidth =8, # Width of the output (in inches)height =17.35, # Height of the output (in inches)dpi =300, # Resolution (300 DPI for high quality)bg ="white", )# Display the saved image in the Quarto documentknitr::include_graphics(here::here("Outputs", "Fig_1_combined_points_3.png"))
Source Code
---title: "Manuscript_gap_analysis"author: "Théophile L. Mouton"date: "November 5, 2025"format: html: toc: true toc-location: right css: custom.css output-file: "Manuscript_gap_analysis.html" self-contained: true code-fold: true code-tools: trueeditor: visualexecute: warning: false message: false echo: true---# Load the libraries```{r}# Load necessary librarieslibrary(raster)library(here)library(sp)library(dplyr)library(tidyr)library(ggplot2)library(dplyr)library(sf)library(rnaturalearth)library(rnaturalearthdata)library(smoothr)library(terra) # For raster operationslibrary(raster)library(parallel)library(pbapply)library(knitr)library(kableExtra)library(patchwork)library(gridExtra)library(tidyverse)library(ggridges)library(betareg)library(margins)library(ggpubr)library(knitr)```# Load the data```{r}# Species presence grid load(here::here("Data", "GAP analyses", "puvsp_marine.Rdata"))# No-take MPA raster layermpa_NT <- raster::raster(here::here("Data", "GAP analyses", "mpa_NT_binary.tif"))# High seas shapefile # High seas shapefile High_seas =st_read(here::here("Data", "World_High_Seas_v2_20241010", "High_Seas_v2.shp"), quiet =TRUE)# Ecoregion shapefilemeow_ecos <-st_read(here("Data", "Shapefiles", "meow_ecos", "meow_ecos.shp"), quiet =TRUE)# EEZ shapefile eez <-st_read(here("Data", "World_EEZ_v12_20231025", "eez_v12.shp"), quiet =TRUE)# Bathymetry raster layerBathy <-raster(here::here("Data", "bathymetry-0.1deg-adjusted.tif"))# IUCN statusesIUCN <-readRDS(here::here("Data", "GAP analyses", "sharks_iucn_final.RDS"))IUCN$species <-gsub("_", " ", IUCN$Species)```# GAP analyses## GAP analyses of shark and ray range overlaps with Marine Protected Areas```{r}# Convert species dataframe to spatial pointsspecies_points <-SpatialPointsDataFrame(coords = puvsp_marine[, c("lon", "lat")], data = puvsp_marine, proj4string =CRS(projection(mpa_NT)))# Extract MPA values for each species pointmpa_NT_values <- raster::extract(mpa_NT, species_points)# Add MPA values to the species dataframepuvsp_marine$mpa_NT_present <- mpa_NT_values# Function to calculate percentage of range in MPAcalculate_mpa_percentage <-function(species_column, mpa_column) { species_presence <- species_column ==1# Assuming 1 indicates species presence total_range <-sum(species_presence, na.rm =TRUE) range_in_mpa <-sum(species_presence & mpa_column ==1, na.rm =TRUE)if (total_range ==0) {return(NA) # Return NA if the species is not present anywhere } percentage <- (range_in_mpa / total_range) *100return(percentage)}# Apply function to all species columns for both MPA typesspecies_columns <-4:(ncol(puvsp_marine) -2) # Assuming species columns start at 4mpa_NT_percentages <-sapply(puvsp_marine[, species_columns], function(x) calculate_mpa_percentage(x, puvsp_marine$mpa_NT_present))# Create a dataframe with resultsresults <-data.frame(species =names(mpa_NT_percentages),percentage_in_NT_MPA = mpa_NT_percentages)# Remove any NA resultsresults <- results[!is.na(results$percentage_in_NT_MPA), ]# Sort results by percentage in no-take MPAs (descending)results <- results[order(-results$percentage_in_NT_MPA), ]# Display top 10 species#print(head(results, 10))# Summary statisticssummary_stats <-data.frame(Statistic =c("Min", "1st Qu.", "Median", "Mean", "3rd Qu.", "Max", "SD"),Value =c(summary(results$percentage_in_NT_MPA)[1],summary(results$percentage_in_NT_MPA)[2],summary(results$percentage_in_NT_MPA)[3],summary(results$percentage_in_NT_MPA)[4],summary(results$percentage_in_NT_MPA)[5],summary(results$percentage_in_NT_MPA)[6],sd(results$percentage_in_NT_MPA) ))kable(summary_stats, caption ="Summary Statistics for No-Take MPAs", digits =2)# Save results to CSVwrite.csv(results, here::here("Data","species_mpa_coverage_NT.csv"), row.names =FALSE)```# Map of mean % range covered```{r}# Convert results to a named vector for easier lookupspecies_NT_percentages <-setNames(results$percentage_in_NT_MPA, results$species)# Create a new dataframe to store resultsgrid_data <- puvsp_marine %>% dplyr::select(lon, lat) %>%mutate(species_count =0,sum_percentages =0 )# For each species, add its coverage percentage to cells where it's presentfor (sp_name innames(species_NT_percentages)) {# Find the column index in the original data col_idx <-which(names(puvsp_marine) == sp_name)if (length(col_idx) >0) {# Find cells where this species is present present_cells <-which(puvsp_marine[[col_idx]] ==1)if (length(present_cells) >0) {# Add to species count grid_data$species_count[present_cells] <- grid_data$species_count[present_cells] +1# Add the species' NT percentage to the sum grid_data$sum_percentages[present_cells] <- grid_data$sum_percentages[present_cells] + species_NT_percentages[sp_name] } }}# Calculate mean percentage for each grid cellgrid_data <- grid_data %>%mutate(mean_NT_coverage =ifelse(species_count >0, sum_percentages / species_count, NA)) %>%filter(!is.na(mean_NT_coverage))# Exclude high seas grid cells ---- # Create high seas template rastertemplate_raster <-rast(ext(High_seas), resolution =0.5)# Rasterize high seas polygonhighseas_raster <-rasterize(vect(High_seas), template_raster, field =1, background =0)# Convert grid_data to terra SpatVector for extractiongrid_points <-vect(as.data.frame(grid_data), geom =c("lon", "lat"), crs =crs(highseas_raster))# Extract values from highseas rasterhighseas_values <- terra::extract(highseas_raster, grid_points)# Add highseas classification back to the dataframegrid_data$is_highseas <- highseas_values[, 2]grid_data$is_highseas[is.na(grid_data$is_highseas)] <-0# Set NA to 0 (continental)# Filter to keep only continental waters (is_highseas = 0)grid_data_continental <- grid_data %>%filter(is_highseas ==0)# Define the projectionmcbryde_thomas_2 <-"+proj=mbt_s"# Convert grid data to sf object and project (continental waters only)grid_data_sf <- grid_data_continental %>%st_as_sf(coords =c("lon", "lat"), crs =4326) %>%st_transform(crs = mcbryde_thomas_2)# Get and project landland_projected <-ne_countries(scale ="medium", returnclass ="sf") %>%st_transform(crs = mcbryde_thomas_2)# Create the globe borderglobe_bbox <-rbind(c(-180, -90), c(-180, 90), c(180, 90), c(180, -90), c(-180, -90))globe_border <-st_polygon(list(globe_bbox)) %>%st_sfc(crs =4326) %>%st_sf(data.frame(rgn ='globe', geom = .)) %>% smoothr::densify(max_distance =0.5) %>%st_transform(crs = mcbryde_thomas_2)# Create base thememy_theme <-theme_minimal() +theme(legend.position ="bottom",legend.direction ="horizontal",legend.box ="vertical",legend.margin =margin(t =20, r =0, b =0, l =0),legend.title =element_text(margin =margin(b =10)),panel.background =element_rect(fill ="white", color =NA),plot.background =element_rect(fill ="white", color =NA),panel.grid =element_blank(),axis.text =element_blank(), # Add this lineaxis.ticks =element_blank() # Add this line )# Calculate the data range for better color scalingmax_value <-max(grid_data_continental$mean_NT_coverage, na.rm =TRUE)color_breaks <-seq(0, ceiling(max_value /10) *10, by =2)# Map with built-in transformationnt_map_projected <-ggplot() +geom_sf(data = grid_data_sf, aes(color = mean_NT_coverage), size =0.5, alpha =0.7) +geom_sf(data = land_projected, fill ="lightgrey", color ="lightgrey", size =0.1) +geom_sf(data = globe_border, fill =NA, color ="black", size =0.5) +scale_color_viridis_c(option ="viridis",name ="Mean % range in no-take MPAs",trans ="sqrt", # Use "log10" for log scalebreaks = color_breaks,guide =guide_colorbar(barwidth =20, barheight =0.5, title.position ="top", title.hjust =0.5) ) +coord_sf() +labs(# title = "Mean Percentage of Species Range in No-Take MPAs",# subtitle = "Continental Waters Only (Square Root Scale)",x =NULL, y =NULL ) +# annotate("text", x = -Inf, y = Inf, label = "(A)", # hjust = -1, vjust = 2, size = 6, fontface = "bold") + my_theme# Print the projected mapprint(nt_map_projected)# Save the projected map (continental waters only)ggsave(here::here("Outputs","NT_MPA_coverage_continental.png"), nt_map_projected, width =10, height =7, dpi =300)# Compute statisticscontinental_stats <-data.frame(Statistic =c("Mean", "Max", "Min", "Median", "Standard Deviation"),Value =c(mean(grid_data_continental$mean_NT_coverage, na.rm =TRUE),max(grid_data_continental$mean_NT_coverage, na.rm =TRUE),min(grid_data_continental$mean_NT_coverage, na.rm =TRUE),median(grid_data_continental$mean_NT_coverage, na.rm =TRUE),sd(grid_data_continental$mean_NT_coverage, na.rm =TRUE) ))# Display as a formatted tablekable(continental_stats, caption ="Continental Waters Statistics", digits =2)```# Breakdown by realm, ecoregion and EEZ```{r}# Convert grid_data_continental to sf for spatial operationsgrid_sf <- grid_data_continental %>%st_as_sf(coords =c("lon", "lat"), crs =4326)#=====================# Calculate by Realm#=====================# Extract realms from meow_ecosrealms <- meow_ecos %>%group_by(REALM) %>%summarize(geometry =st_union(geometry)) %>%ungroup()# Spatial join with realmsgrid_realms <-st_join(grid_sf, realms)# Calculate mean coverage by realmrealm_stats <- grid_realms %>%st_drop_geometry() %>%group_by(REALM) %>%summarize(mean_NT_coverage =mean(mean_NT_coverage, na.rm =TRUE),median_NT_coverage =median(mean_NT_coverage, na.rm =TRUE),sd_NT_coverage =sd(mean_NT_coverage, na.rm =TRUE),n_cells =n() ) %>%arrange(desc(mean_NT_coverage))#======================# Calculate by Ecoregion#======================# Spatial join with ecoregionsgrid_ecoregions <-st_join(grid_sf, meow_ecos)# Calculate mean coverage by ecoregionecoregion_stats <- grid_ecoregions %>%st_drop_geometry() %>%group_by(ECOREGION) %>%summarize(PROVINCE =first(PROVINCE),REALM =first(REALM),mean_NT_coverage =mean(mean_NT_coverage, na.rm =TRUE),median_NT_coverage =median(mean_NT_coverage, na.rm =TRUE),sd_NT_coverage =sd(mean_NT_coverage, na.rm =TRUE),n_cells =n() ) %>%arrange(desc(mean_NT_coverage))#======================# Calculate by EEZ#======================# Fix potential invalid geometries in EEZ dataeez_valid <-st_make_valid(eez)# Spatial join with EEZgrid_eez <-st_join(grid_sf, eez_valid %>% dplyr::select(TERRITORY1, SOVEREIGN1, ISO_TER1, MRGID_TER1))# Calculate mean coverage by EEZeez_stats <- grid_eez %>%st_drop_geometry() %>%group_by(TERRITORY1, SOVEREIGN1) %>%summarize(ISO_TER1 =first(ISO_TER1),MRGID_TER1 =first(MRGID_TER1),mean_NT_coverage =mean(mean_NT_coverage, na.rm =TRUE),median_NT_coverage =median(mean_NT_coverage, na.rm =TRUE),sd_NT_coverage =sd(mean_NT_coverage, na.rm =TRUE),n_cells =n() ) %>%arrange(desc(mean_NT_coverage))# Print summary tablescat("Top 10 Realms by Mean No-Take MPA Coverage:\n")print(head(realm_stats, 10))cat("\nTop 10 Ecoregions by Mean No-Take MPA Coverage:\n")print(head(ecoregion_stats, 10))cat("\nTop 10 EEZs by Mean No-Take MPA Coverage:\n")print(head(eez_stats, 10))#======================# Create visualizations#======================# 1. Choropleth map of ecoregions# Join stats back to spatial datameow_ecos_stats <- meow_ecos %>%left_join(ecoregion_stats, by ="ECOREGION")# Plot ecoregions mapeco_map <-ggplot() +geom_sf(data = meow_ecos_stats, aes(fill = mean_NT_coverage), color =NA) +geom_sf(data =ne_countries(scale ="medium", returnclass ="sf"), fill ="lightgrey", color ="darkgrey", size =0.1) +scale_fill_viridis_c(name ="Mean % range in no-take MPAs",na.value ="grey90",trans ="sqrt",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +theme_minimal() +labs(#title = "Mean No-Take MPA Coverage by Marine Ecoregion",x =NULL, y =NULL ) +theme(legend.position ="bottom",panel.grid =element_blank(),axis.text =element_blank(),axis.ticks =element_blank() )# Save ecoregion mapggsave(here("Outputs", "ecoregion_NT_coverage_map.png"), eco_map, width =12, height =8, dpi =300)# 2. Choropleth map of EEZs# Join stats back to spatial dataeez_stats_map <- eez_valid %>%left_join(eez_stats, by =c("TERRITORY1", "SOVEREIGN1"))# Plot EEZ mapeez_map <-ggplot() +geom_sf(data = eez_stats_map, aes(fill = mean_NT_coverage), color =NA) +geom_sf(data =ne_countries(scale ="medium", returnclass ="sf"), fill ="lightgrey", color ="darkgrey", size =0.1) +scale_fill_viridis_c(name ="Mean % range in No-Take MPAs",na.value ="grey90",trans ="sqrt",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +theme_minimal() +labs(title ="Mean No-Take MPA Coverage by Exclusive Economic Zone (EEZ)",x =NULL, y =NULL ) +theme(legend.position ="bottom",panel.grid =element_blank() )# Save EEZ mapggsave(here("Outputs", "eez_NT_coverage_map.png"), eez_map, width =12, height =8, dpi =300)# 3. Bar plots for top regions# Top 20 Ecoregionstop_eco_plot <-ggplot(head(ecoregion_stats, 20), aes(x =reorder(ECOREGION, mean_NT_coverage), y = mean_NT_coverage)) +geom_bar(stat ="identity", fill ="steelblue") +geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, ymax = mean_NT_coverage + sd_NT_coverage), width =0.2) +coord_flip() +labs(# title = "Top 20 Marine Ecoregions by Mean No-Take MPA Coverage",x =NULL,y ="Mean % range in no-take MPAs" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Save top ecoregions plotggsave(here("Outputs", "top_ecoregions_NT_coverage.png"), top_eco_plot, width =10, height =8, dpi =300)# Bottom 20 Ecoregionsbottom_eco_plot <-ggplot(tail(ecoregion_stats, 20), aes(x =reorder(ECOREGION, mean_NT_coverage), y = mean_NT_coverage)) +geom_bar(stat ="identity", fill ="coral") +geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, ymax = mean_NT_coverage + sd_NT_coverage), width =0.2) +coord_flip() +labs(# title = "Bottom 20 Marine Ecoregions by Mean No-Take MPA Coverage",x =NULL,y ="Mean % range in no-take MPAs" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Save bottom ecoregions plotggsave(here("Outputs", "bottom_ecoregions_NT_coverage.png"), bottom_eco_plot, width =10, height =8, dpi =300)# Top 20 EEZstop_eez_plot <-ggplot(head(eez_stats, 20), aes(x =reorder(TERRITORY1, mean_NT_coverage), y = mean_NT_coverage)) +geom_bar(stat ="identity", fill ="steelblue") +geom_errorbar(aes(ymin = mean_NT_coverage - sd_NT_coverage, ymax = mean_NT_coverage + sd_NT_coverage), width =0.2) +coord_flip() +labs(title ="Top 20 EEZs by Mean No-Take MPA Coverage",x =NULL,y ="Mean % Range in No-Take MPAs" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Save top EEZs plotggsave(here("Outputs", "top_eezs_NT_coverage.png"), top_eez_plot, width =10, height =8, dpi =300)```# Null model of MPA placement within EEZs## Null model of MPA placement and overlaps with the range of sharks and rays```{r}# Create marine mask from bathymetry (areas < 0)marine_mask <- Bathy <0# Resample marine mask to match MPA raster resolutionmarine_mask_resampled <- raster::resample(marine_mask, mpa_NT, method ="ngb")# Ensure marine_mask_resampled is binary (0 or 1)marine_mask_resampled <- raster::reclassify(marine_mask_resampled, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Function to create a raster mask of EEZs with unique values for each countrycreate_eez_mask <-function(template_raster) {# Ensure EEZ has a unique identifier for each country eez$country_id <-as.numeric(as.factor(eez$SOVEREIGN1))# Rasterize the EEZ shapefile using the country_id eez_raster <-rasterize(eez, template_raster, field ="country_id")return(eez_raster)}# Updated create_random_mpa function to randomize within each country's EEZcreate_random_mpa <-function(template_raster, marine_mask, eez_mask) {# Ensure template_raster is binary template_raster <- raster::reclassify(template_raster, c(-Inf, 0.5, 0, 0.5, Inf, 1))# Combine marine_mask and eez_mask combined_mask <- marine_mask * (eez_mask >0)# Get unique country IDs country_ids <-unique(eez_mask[!is.na(eez_mask)]) country_ids <- country_ids[country_ids >0]# Initialize random raster random_raster <- template_raster random_raster[] <-NAfor (country_id in country_ids) {# Create mask for current country country_mask <- eez_mask == country_id# Count original marine MPA cells within current country's EEZ original_mpa_cells <-sum(template_raster[] ==1& country_mask[] &!is.na(combined_mask[]), na.rm =TRUE)# Get all valid cells for current country (marine areas within EEZ) valid_cells <-which(country_mask[] &!is.na(combined_mask[])) total_valid_cells <-length(valid_cells)if (original_mpa_cells >0&& total_valid_cells >0) {if (original_mpa_cells > total_valid_cells) {warning(paste("More MPA cells than valid marine cells for country ID", country_id, ". Adjusting MPA cell count.")) original_mpa_cells <- total_valid_cells }# Randomly select valid cells to be MPAs for current country mpa_cells <-sample(valid_cells, size =min(original_mpa_cells, total_valid_cells), replace =FALSE)# Update random raster random_raster[valid_cells] <-0 random_raster[mpa_cells] <-1 } }return(random_raster)}# Calculate MPA percentage functioncalculate_mpa_percentage <-function(species_values, mpa_values) { total_cells <-sum(!is.na(species_values)) mpa_cells <-sum(mpa_values ==1&!is.na(species_values), na.rm =TRUE) percentage <- (mpa_cells / total_cells) *100return(percentage)}# Main analysis functionrun_random_mpa_analysis <-function(species_data, mpa_raster, marine_mask, eez_mask, n_iterations =100) {# Convert species dataframe to spatial points species_points <- sp::SpatialPointsDataFrame(coords = species_data[, c("lon", "lat")], data = species_data, proj4string = sp::CRS(raster::projection(mpa_raster)))# Prepare results storage species_columns <-4:(ncol(species_data) -1) # Adjust if needed all_results <-matrix(nrow =length(species_columns), ncol = n_iterations)rownames(all_results) <-names(species_data)[species_columns]# Use pblapply for parallel processing with a progress bar all_results <-pblapply(1:n_iterations, function(i) {set.seed(i) # Set seed for reproducibility random_mpa <-create_random_mpa(mpa_raster, marine_mask, eez_mask) random_mpa_values <- raster::extract(random_mpa, species_points)sapply(species_data[, species_columns], function(x) calculate_mpa_percentage(x, random_mpa_values)) }, cl =detectCores() -1) # Use one less than available cores all_results <-do.call(cbind, all_results)# Calculate mean and standard deviation for each species mean_percentages <-rowMeans(all_results, na.rm =TRUE) sd_percentages <-apply(all_results, 1, sd, na.rm =TRUE) results <-data.frame(species =rownames(all_results),mean_percentage = mean_percentages,sd_percentage = sd_percentages )return(results)}# Function to process resultsprocess_results <-function(results, mpa_type) {# Sort results by mean_percentage in descending order results <- results[order(-results$mean_percentage), ]# Calculate summary statistics summary_stats <-summary(results$mean_percentage)# Write results to CSVwrite.csv(results, here::here("Outputs",paste0("species_random_", mpa_type, "_coverage.csv")), row.names =FALSE)# Return a list containing the processed datareturn(list(top_10 =head(results, 10),summary_stats = summary_stats,all_results = results ))}# Create the EEZ mask with unique country IDseez_mask <-create_eez_mask(mpa_NT)# Run the analysis with the country-specific EEZ constrainttryCatch({ random_results_NT <-run_random_mpa_analysis(puvsp_marine, mpa_NT, marine_mask_resampled, eez_mask, n_iterations =100)print("Analysis for No-Take MPAs completed successfully.")}, error =function(e) {print(paste("Error in No-Take MPAs analysis:", e$message))})# Process results for both MPA typesprocessed_results <-list()if (exists("random_results_NT")) { processed_results[["No-take MPAs"]] <-process_results(random_results_NT, "No-take MPAs")}```# Compare results## Compare results between the MPA network and the Null model of MPA placement```{r fig.height=2.5}# Load actual MPA coverage dataactual_coverage <- read.csv(here::here("Data", "species_mpa_coverage_NT.csv"))colnames(actual_coverage)[2] <- c("NT")# Load random MPA coverage resultsrandom_NT <- read.csv(here::here("Outputs", "species_random_No-take MPAs_coverage.csv"))# Reshape actual coverage data to long format (NT only)actual_long <- actual_coverage %>% select(species, NT) %>% rename(actual_percentage = NT)# Function to merge and compare actual vs random datacompare_coverage <- function(actual_data, random_data) { comparison <- actual_data %>% left_join(random_data, by = "species") %>% mutate(difference = actual_percentage - mean_percentage, z_score = (actual_percentage - mean_percentage) / sd_percentage) return(comparison)}# Create comparison dataframe for NT MPAscomparison_NT <- compare_coverage(actual_long, random_NT)# Function to summarize and print resultssummarize_results <- function(comparison_data) { over_represented <- mean(comparison_data$difference > 0, na.rm = TRUE) * 100 under_represented <- mean(comparison_data$difference < 0, na.rm = TRUE) * 100 equally_represented <- 100 - over_represented - under_represented summary_df <- data.frame( Representation = c("Over-represented", "Under-represented", "Equally represented"), Percentage = round(c(over_represented, under_represented, equally_represented), 2) ) kable(summary_df, format = "html", col.names = c("Representation", "Percentage (%)"), caption = "Summary for No-Take MPAs") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>% column_spec(2, width = "100px") %>% row_spec(0, bold = TRUE) %>% add_header_above(c(" " = 1, "Species" = 1)) %>% footnote(general = "Percentages may not sum to 100% due to rounding.")}# Summarize resultssummarize_results(comparison_NT)# Identify significantly over/under-represented speciessignificant_species <- function(comparison_data, z_threshold = 1.96) { over_represented <- comparison_data %>% filter(z_score > z_threshold) %>% arrange(desc(z_score)) under_represented <- comparison_data %>% filter(z_score < -z_threshold) %>% arrange(z_score) top_10_over <- over_represented %>% dplyr::select(species, actual_percentage, mean_percentage, difference, z_score) %>% head(10) top_10_under <- under_represented %>% dplyr::select(species, actual_percentage, mean_percentage, difference, z_score) %>% head(10) combined_table <- bind_rows( top_10_over, top_10_under ) %>% mutate(across(where(is.numeric), ~round(., 2))) kable_output <- kable(combined_table, format = "html", col.names = c("Species", "Actual %", "Random %", "Difference", "SES"), caption = "Top 10 significantly over/under-represented species in No-Take MPAs") %>% kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"), full_width = FALSE) %>% column_spec(1, width = "200px") %>% column_spec(2:5, width = "100px") %>% row_spec(0, bold = TRUE) %>% pack_rows("Over-represented", 1, 10, label_row_css = "background-color: #e6f3ff; color: #000;") %>% pack_rows("Under-represented", 11, 20, label_row_css = "background-color: #fff0e6; color: #000;") footnote_text <- paste( "Total significantly over-represented species:", nrow(over_represented), "\n", "Total significantly under-represented species:", nrow(under_represented) ) kable_output <- kable_output %>% footnote(general = footnote_text, general_title = "Note:") # Save as PNG save_kable(kable_output, file = here::here("Outputs", "significant_species_NT.png"), zoom = 2) return(kable_output)}# Identify significant species and save as PNGsignificant_species(comparison_NT)# Stacked bar chart# Create a summary dataframe for plottingsummary_NT <- data.frame( Representation = c("Under-represented", "Equally represented", "Over-represented"), Percentage = c( mean(comparison_NT$difference < 0, na.rm = TRUE) * 100, 100 - mean(comparison_NT$difference > 0, na.rm = TRUE) * 100 - mean(comparison_NT$difference < 0, na.rm = TRUE) * 100, mean(comparison_NT$difference > 0, na.rm = TRUE) * 100 ))# Order the factor levels in reverse for proper stackingsummary_NT$Representation <- factor(summary_NT$Representation, levels = c("Over-represented", "Equally represented", "Under-represented"))# Create the horizontal stacked barplotstacked_bar_chart <- ggplot(summary_NT, aes(x = 1, y = Percentage, fill = Representation)) + geom_bar(stat = "identity", position = "stack", width = 0.5) + scale_fill_manual(values = c("Over-represented" = "#56B4E9", "Equally represented" = "#999999", "Under-represented" = "#E69F00"), breaks = c("Under-represented", "Equally represented", "Over-represented")) + geom_text(aes(label = sprintf("%.2f%%", Percentage)), position = position_stack(vjust = 0.5), color = "black", size = 4) + coord_flip() + theme_minimal() + labs( x = NULL, y = "Percentage of species (%)" ) + scale_y_continuous(expand = c(0, 0), limits = c(0, 100)) + theme( legend.position = "top", legend.title = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.y = element_blank() )# Display the plotprint(stacked_bar_chart)```# Breakdown of under-represented species```{r}# Identify under-represented species (where actual coverage is less than random)under_represented_NT <- comparison_NT %>%filter(difference <0) %>%arrange(difference) %>% dplyr::select(species, actual_percentage, mean_percentage, difference, z_score)# Count the number of under-represented speciesn_under_NT <-nrow(under_represented_NT)# Save the list of under-represented species to CSV filewrite.csv(under_represented_NT, here::here("Outputs", "under_represented_species_NT_MPAs.csv"), row.names =FALSE)# Create summary data frame with additional informationunder_rep_NT_summary <- under_represented_NT %>%mutate(protection_gap = mean_percentage - actual_percentage,representation_ratio = actual_percentage / mean_percentage) %>%arrange(desc(protection_gap))# Save the enhanced summary datawrite.csv(under_rep_NT_summary, here::here("Outputs", "under_represented_species_NT_MPAs_summary.csv"), row.names =FALSE)## Compare with IUCN data # Define the function to prepare IUCN dataprepare_iucn_data <-function(species_data, species_col) {# Rename the species column in the input data to match species_data_renamed <- species_data %>% dplyr::rename(species =all_of(species_col))# Ensure we're only joining based on the species name species_data_slim <- species_data_renamed %>% dplyr::select(species) results_IUCN <-inner_join(IUCN, species_data_slim, by ="species") %>%mutate(IUCN_status =case_when( iucn_GE ==0~"LC", iucn_GE ==1~"NT", iucn_GE ==2~"VU", iucn_GE ==3~"EN", iucn_GE ==4~"CR",TRUE~"Unknown" )) %>%filter(IUCN_status !="Unknown")return(results_IUCN)}# Get species lists for each categoryall_species <- actual_coverage %>% dplyr::select(species)under_rep_NT_species <- under_represented_NT %>% dplyr::select(species)# Use the prepare_iucn_data function to get IUCN statuses for each group separatelyall_species_iucn <-prepare_iucn_data(all_species, "species")under_rep_NT_iucn <-prepare_iucn_data(under_rep_NT_species, "species")# Function to summarize IUCN status distributionsummarize_iucn <-function(data, title) {# Count species by IUCN status iucn_counts <- data %>%group_by(IUCN_status) %>%summarise(count =n(),.groups ="drop" ) %>%mutate(percentage = count /sum(count) *100,title = title )# Order IUCN statuses correctly iucn_counts$IUCN_status <-factor(iucn_counts$IUCN_status,levels =c("LC", "NT", "VU", "EN", "CR"))return(iucn_counts)}# Get IUCN summaries for each datasetall_iucn_summary <-summarize_iucn(all_species_iucn, "All Species")under_NT_iucn_summary <-summarize_iucn(under_rep_NT_iucn, "Under-represented (No-take MPAs)")# Print summaries to checkprint(all_iucn_summary)print(under_NT_iucn_summary)# Combine summariescombined_iucn_summary <-bind_rows( all_iucn_summary, under_NT_iucn_summary)# Create a bar plot comparing IUCN distributionsiucn_plot_NT <-ggplot(combined_iucn_summary, aes(x = IUCN_status, y = percentage, fill = title)) +geom_bar(stat ="identity", position ="dodge") +geom_text(aes(label =paste0(round(percentage, 2), "%")),position =position_dodge(width =0.9),vjust =-0.5,size =3) +scale_fill_manual(values =c("All Species"="grey70","Under-represented (No-take MPAs)"="darkred")) +labs(x ="IUCN Red List threat status",y ="Percentage of species (%)",fill ="" ) +theme_classic() +theme(legend.position ="bottom",panel.background =element_rect(fill ="white", color =NA),plot.background =element_rect(fill ="white", color =NA),plot.title =element_text(hjust =0.5, face ="bold"),plot.subtitle =element_text(hjust =0.5) )# Print the plotprint(iucn_plot_NT)# Save the plotggsave(here::here("Outputs", "under_represented_species_iucn_comparison_NT.png"), iucn_plot_NT, width =10, height =6, dpi =300)# Statistical test: Chi-square test to determine if there are significant differences# in IUCN status distribution between all species and under-represented species# Prepare contingency tablesprepare_contingency <-function(summary_df1, summary_df2) {# Ensure all IUCN categories are present in both datasets all_categories <-c("LC", "NT", "VU", "EN", "CR")# Fill in missing categories with zerofor (cat in all_categories) {if (!cat %in% summary_df1$IUCN_status) { summary_df1 <-bind_rows(summary_df1,data.frame(IUCN_status = cat, count =0,percentage =0, title =unique(summary_df1$title))) }if (!cat %in% summary_df2$IUCN_status) { summary_df2 <-bind_rows(summary_df2,data.frame(IUCN_status = cat, count =0,percentage =0, title =unique(summary_df2$title))) } }# Order and extract counts df1_ordered <- summary_df1 %>%arrange(factor(IUCN_status, levels = all_categories)) %>%pull(count) df2_ordered <- summary_df2 %>%arrange(factor(IUCN_status, levels = all_categories)) %>%pull(count)# Create contingency tablereturn(rbind(df1_ordered, df2_ordered))}# Perform chi-square testcontingency_NT <-prepare_contingency(all_iucn_summary, under_NT_iucn_summary)rownames(contingency_NT) <-c("All Species", "Under-represented (No-take MPAs)")colnames(contingency_NT) <-c("LC", "NT", "VU", "EN", "CR")# Print contingency tablecat("Contingency Table - No-take MPAs:\n")print(contingency_NT)cat("\n")# Run chi-square testchi_test_NT <-chisq.test(contingency_NT)# Print resultscat("Chi-square Test - No-take MPAs vs. All Species:\n")print(chi_test_NT)cat("\n")# Create a summary table with the IUCN distributioniucn_summary_table <- combined_iucn_summary %>%pivot_wider(id_cols = IUCN_status,names_from = title,values_from =c(count, percentage) ) %>%arrange(factor(IUCN_status, levels =c("LC", "NT", "VU", "EN", "CR")))# Save the summary tableswrite.csv(iucn_summary_table, here::here("Outputs", "under_represented_species_iucn_summary_NT.csv"),row.names =FALSE)```# Map results of the GAP analyses## Map mean Standardised Effect Sizes of MPA coverage```{r}# Get land polygons from rnaturalearthland <-ne_countries(scale ="medium", returnclass ="sf")# Create template rastertemplate_raster <-rast(ext(High_seas), resolution =0.5)# Rasterize high seas polygonhighseas_raster <-rasterize(vect(High_seas), template_raster, field =1, background =0)# Plot to verify#plot(highseas_raster,# main = "High Seas (1) vs Continental Waters (0)",# col = c("lightblue", "darkblue"))# For No-take MPAsdifference_sp <- comparison_NT[, c(1, 3, 6)]# Calculate cleaned SES for each species in comparison_NTcomparison_NT <- comparison_NT %>%mutate(SES_clean =case_when( sd_percentage ==0& actual_percentage == mean_percentage ~0, sd_percentage ==0& actual_percentage > mean_percentage ~3, # Cap at +3 sd_percentage ==0& actual_percentage < mean_percentage ~-3, # Cap at -3TRUE~ (actual_percentage - mean_percentage) / sd_percentage ))# Reshape puvsp_marine from wide to long formatpuvsp_long <- puvsp_marine %>%pivot_longer(cols =-c(id, lon, lat), names_to ="species", values_to ="presence") %>%filter(!is.na(presence) & presence ==1) # Only keep rows where species is present# Join SES data to puvsp_longcombined_data_clean <- puvsp_long %>%left_join(comparison_NT %>%select(species, SES_clean), by =c("species"="species"))# Calculate mean SES per cell with cleaned valuesmean_ses_per_cell_clean <- combined_data_clean %>%group_by(id, lon, lat) %>%summarise(mean_SES =mean(SES_clean, na.rm =TRUE), .groups ="drop")# Check the cleaned results#summary(mean_ses_per_cell_clean$mean_SES)# Convert to terra SpatVector for extractionses_points_NT <-vect(as.data.frame(mean_ses_per_cell_clean),geom =c("lon", "lat"),crs =crs(highseas_raster))# Extract values from highseas rasterhighseas_values_NT <- terra::extract(highseas_raster, ses_points_NT)# Add highseas classification back to the dataframemean_ses_per_cell_clean$is_highseas <- highseas_values_NT[, 2]mean_ses_per_cell_clean$is_highseas[is.na(mean_ses_per_cell_clean$is_highseas)] <-0# Set NA to 0 (continental)# Convert to sf for mappingmean_ses_sf_NT <-st_as_sf(mean_ses_per_cell_clean, coords =c("lon", "lat"), crs =4326)# Filter for continental waters onlycontinental_points_NT <- mean_ses_sf_NT[mean_ses_sf_NT$is_highseas ==0, ]# Define the projectionmcbryde_thomas_2 <-"+proj=mbt_s"# Project datasetscontinental_projected_NT <-st_transform(continental_points_NT, crs = mcbryde_thomas_2)land_projected <-ne_countries(scale ="medium", returnclass ="sf") %>%st_transform(crs = mcbryde_thomas_2)# Create the globe borderglobe_bbox <-rbind(c(-180, -90), c(-180, 90),c(180, 90), c(180, -90), c(-180, -90))globe_border <-st_polygon(list(globe_bbox)) %>%st_sfc(crs =4326) %>%st_sf(data.frame(rgn ='globe', geom = .)) %>% smoothr::densify(max_distance =0.5) %>%st_transform(crs = mcbryde_thomas_2)# Create base thememy_theme <-theme_minimal() +theme(legend.position ="bottom",legend.direction ="horizontal",legend.box ="vertical",legend.margin =margin(t =20, r =0, b =0, l =0),legend.title =element_text(margin =margin(b =10)),panel.background =element_rect(fill ="white", color =NA),plot.background =element_rect(fill ="white", color =NA),panel.grid =element_blank(),axis.text =element_blank(),axis.ticks =element_blank() )# Create plot for continental waters - No-take MPAs with McBryde-Thomas projectioncontinental_plot_NT <-ggplot() +geom_sf(data = continental_projected_NT, aes(color = mean_SES), size =0.5, alpha =0.7) +geom_sf(data = land_projected, fill ="lightgrey", color ="lightgrey", size =0.1) +geom_sf(data = globe_border, fill =NA, color ="black", size =0.5) +scale_color_gradient2(low ="#b2182b", mid ="#f0f0f0", high ="#2166ac", midpoint =0,# Note: The color scale is manually limited to [-3, 3] to improve visual contrast.# Values outside this range are squished to the nearest limit (using oob = scales::squish),# which prevents extreme SES values (e.g., -10 or +5) from dominating the color scale.limits =c(-3, 3),oob = scales::squish,name ="Mean SES",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +coord_sf() +labs(x =NULL, y =NULL ) + my_theme# Print the plotprint(continental_plot_NT)# Save the plotggsave(here::here("Outputs", "SES_map_continental_NT.png"), continental_plot_NT,width =10,height =6,dpi =300,bg ="white")#======================# Calculate SES by Ecoregion#======================# Convert mean_ses_per_cell to sf for spatial operationsses_grid_sf <- mean_ses_per_cell_clean %>%st_as_sf(coords =c("lon", "lat"), crs =4326)# Spatial join with ecoregionsses_grid_ecoregions <-st_join(ses_grid_sf, meow_ecos)# Calculate mean SES by ecoregion (excluding areas outside ecoregions)ecoregion_ses_stats <- ses_grid_ecoregions %>%st_drop_geometry() %>%filter(!is.na(ECOREGION)) %>%# Exclude areas outside ecoregionsgroup_by(ECOREGION) %>%summarize(PROVINCE =first(PROVINCE),REALM =first(REALM),mean_SES =mean(mean_SES, na.rm =TRUE),median_SES =median(mean_SES, na.rm =TRUE),sd_SES =sd(mean_SES, na.rm =TRUE),n_cells =n(),.groups ="drop" ) %>%arrange(desc(mean_SES))# Print summarycat("Top 10 Ecoregions by Mean SES:\n")print(head(ecoregion_ses_stats, 10))cat("\nBottom 10 Ecoregions by Mean SES:\n")print(tail(ecoregion_ses_stats, 10))#======================# Create SES ecoregion map#======================# Join SES stats back to spatial datameow_ecos_ses_stats <- meow_ecos %>%left_join(ecoregion_ses_stats, by ="ECOREGION")# Plot ecoregions SES mapses_eco_map <-ggplot() +geom_sf(data = meow_ecos_ses_stats, aes(fill = mean_SES), color =NA) +geom_sf(data = land, fill ="darkgrey", color ="darkgrey", size =0.1) +scale_fill_gradient2(name ="Mean SES",low ="#b2182b", mid ="#f0f0f0", high ="#2166ac", midpoint =0,na.value ="grey90",guide =guide_colorbar(barwidth =20, barheight =0.5,title.position ="top", title.hjust =0.5) ) +coord_sf(xlim =c(-180, 180), ylim =c(-90, 90), expand =FALSE) +theme_minimal() +labs(x =NULL, y =NULL ) +theme(legend.position ="bottom",panel.grid =element_blank(),axis.text =element_blank(),axis.ticks =element_blank() )# Print and save the mapprint(ses_eco_map)ggsave(here("Outputs", "ecoregion_SES_map_NT_clean.png"), ses_eco_map, width =12, height =8, dpi =300)# Top 20 Ecoregions by Mean SEStop_ses_plot <-ggplot(head(ecoregion_ses_stats, 20),aes(x =reorder(ECOREGION, mean_SES), y = mean_SES)) +geom_bar(stat ="identity", fill ="steelblue") +geom_errorbar(aes(ymin = mean_SES - sd_SES,ymax = mean_SES + sd_SES),width =0.2) +coord_flip() +labs(x =NULL,y ="Mean SES" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Bottom 20 Ecoregions by Mean SESbottom_ses_plot <-ggplot(tail(ecoregion_ses_stats, 20),aes(x =reorder(ECOREGION, mean_SES), y = mean_SES)) +geom_bar(stat ="identity", fill ="coral") +geom_errorbar(aes(ymin = mean_SES - sd_SES,ymax = mean_SES + sd_SES),width =0.2) +coord_flip() +labs(x =NULL,y ="Mean SES" ) +theme_minimal() +theme(axis.text.y =element_text(size =8) )# Print and save plotsprint(top_ses_plot)ggsave(here("Outputs", "top_ecoregions_SES_NT_clean.png"), top_ses_plot, width =10, height =8, dpi =300)print(bottom_ses_plot)ggsave(here("Outputs", "bottom_ecoregions_SES_NT_clean.png"), bottom_ses_plot, width =10, height =8, dpi =300)# Save ecoregion statisticswrite.csv(ecoregion_ses_stats, here::here("Outputs", "ecoregion_SES_stats_NT.csv"),row.names =FALSE)```# Relate to IUCN status## Relate the percentage of species range overlapped by MPAs to the IUCN Red List status with betaregression analysis```{r}# Join IUCN and results dataresults_IUCN <-left_join(IUCN, actual_coverage, by =c("species"))# Add IUCN status categoriesresults_IUCN <- results_IUCN %>%mutate(IUCN_status =case_when( iucn_GE ==0~"LC", iucn_GE ==1~"NT", iucn_GE ==2~"VU", iucn_GE ==3~"EN", iucn_GE ==4~"CR",TRUE~"Unknown" ))# Filter out the "Unknown" categoryresults_IUCN_filtered <- results_IUCN %>%filter(IUCN_status !="Unknown")#======================# Beta Regression for No-take MPAs#======================# Transform percentage to (0,1) with boundary correction for no-take MPAsresults_IUCN_filtered <- results_IUCN_filtered %>%mutate(n_obs =n(),mpa_coverage_beta_nt = (NT/100* (n_obs -1) +0.5) / n_obs )# Fit beta regression for no-take MPAsmpa_beta_nt <-betareg(mpa_coverage_beta_nt ~ iucn_GE, data = results_IUCN_filtered)summary(mpa_beta_nt)# Get marginal effectsmpa_margins_nt <-margins(mpa_beta_nt)summary(mpa_margins_nt)# Get predicted values by threat categorympa_pred_nt <-predict(mpa_beta_nt,newdata =data.frame(iucn_GE =0:4),type ="response") *100print("Predicted No-take MPA coverage by IUCN category:")print(data.frame(Category =0:4,IUCN =c("LC", "NT", "VU", "EN", "CR"),Predicted_Coverage =round(mpa_pred_nt, 2)))# Average decrease per categoryavg_decrease_nt <-mean(diff(mpa_pred_nt))print(paste("Average decrease per threat category:", round(avg_decrease_nt, 2), "%"))#======================# Ridge Plot with Points - No-take MPAs#======================# Define IUCN colors and orderiucn_colors <-c("CR"="#D81E05","EN"="#FC7F3F","VU"="#FEC748","NT"="#58AFFF","LC"="#38AB38")iucn_order <-c("LC", "NT", "VU", "EN", "CR")# Prepare no-take MPA data for plottingnotake_mpa_iucn <- results_IUCN_filtered %>% dplyr::select(species, IUCN_status, NT) %>%rename(protection = NT)# Create the ridge plot with colored pointsrdplot_points <-ggplot(notake_mpa_iucn, aes(x =factor(IUCN_status, levels = iucn_order), y = protection)) +geom_jitter(aes(color = IUCN_status),width =0.1, size =1.5, alpha =0.6) +# Mean and SD statisticsstat_summary(fun = mean, geom ="point", size =3, color ="black") +stat_summary(fun.data =function(x) { mean_val <-mean(x, na.rm =TRUE) sd_val <-sd(x, na.rm =TRUE)return(data.frame(y = mean_val, ymin =max(0, mean_val - sd_val),ymax = mean_val + sd_val)) },geom ="errorbar", width =0.1, color ="black") +labs(x ="IUCN Red List threat status", y ="(%) Range within no-take MPAs") +theme_minimal() +theme(plot.title =element_text(hjust =0.5, face ="bold"),legend.position ="none",panel.grid.major.x =element_blank() ) +scale_x_discrete(limits = iucn_order) +scale_color_manual(values = iucn_colors) +scale_y_sqrt(limits =c(0, 50),breaks =seq(0, 50, 10),labels =c("0", "10", "20", "30", "40", ""))print(rdplot_points)# Add white background to the plotrdplot_points_white <- rdplot_points +theme(plot.background =element_rect(fill ="white", color =NA),panel.background =element_rect(fill ="white", color =NA) )# Save PNG with white backgroundggsave(filename = here::here("Outputs", "notake_mpa_protection_points.png"),plot = rdplot_points_white,width =6,height =5,dpi =300,bg ="white")# Summary statistics for no-take MPAssummary_stats_nt <- results_IUCN_filtered %>%group_by(IUCN_status) %>%summarise(count =n(),mean =mean(NT, na.rm =TRUE),median =median(NT, na.rm =TRUE),sd =sd(NT, na.rm =TRUE),.groups ="drop" ) %>%arrange(factor(IUCN_status, levels = iucn_order))print("Summary statistics for No-take MPAs:")print(summary_stats_nt)```# Grid of Fig. 1```{r}final_plot_combined = ggpubr::ggarrange( rdplot_points_white, nt_map_projected, stacked_bar_chart, continental_plot_NT,nrow =4,ncol =1, labels =c("(A)", "(B)", "(C)", "(D)"),heights =c(0.8, 1.5, 0.4, 1.5), font.label =list(size =16, face ="bold"),vjust =1,hjust =-0.1)print(final_plot_combined)# Define the output path using `here`output_path <- here::here("Outputs", "Fig_1_combined_points_3.png")# Save the final plot using `ggsave`ggsave(filename = output_path, # File path for savingplot = final_plot_combined, # The combined plotwidth =8, # Width of the output (in inches)height =17.35, # Height of the output (in inches)dpi =300, # Resolution (300 DPI for high quality)bg ="white", )# Display the saved image in the Quarto documentknitr::include_graphics(here::here("Outputs", "Fig_1_combined_points_3.png"))```